{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Feature: Meta-Features Using Pairwise Diffs/Ratios"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Generate additional meta-features derived from pairwise differences and ratios of most important features (helpful for tree-based models)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This utility package imports `numpy`, `pandas`, `matplotlib` and a helper `kg` module into the root namespace."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from pygoose import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import itertools"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Config"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Automatically discover the paths to various data folders and compose the project structure."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "project = kg.Project.discover()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Identifier for storing these features on disk and referring to them later."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "feature_list_id = 'meta_pairwise_interactions'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Feature lists containing the most important features."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "feature_lists = [\n",
    "    # Place feature lists here.\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Names of the features to use for pairwise interactions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "most_important_features = [\n",
    "    # Place features here.\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train, df_test, _ = project.load_feature_lists(feature_lists)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "feature_names = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for a, b in itertools.combinations(most_important_features, 2):\n",
    "    diff_feature_name = f'diff_{a}_{b}'\n",
    "    df_train[diff_feature_name] = df_train[a] - df_train[b]\n",
    "    df_test[diff_feature_name] = df_test[a] - df_test[b]\n",
    "    feature_names.append(diff_feature_name)\n",
    "    \n",
    "    ratio_feature_name = f'ratio_{a}_{b}'\n",
    "    df_train[ratio_feature_name] = df_train[a] / df_train[b]\n",
    "    df_test[ratio_feature_name] = df_test[a] / df_test[b]\n",
    "    feature_names.append(ratio_feature_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Save features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "features_to_keep = feature_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = df_train[features_to_keep].values\n",
    "X_test = df_test[features_to_keep].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "project.save_features(X_train, X_test, feature_names, feature_list_id)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  },
  "widgets": {
   "state": {
    "068574a6e5cf4bfb8a8766f228dc3517": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "09c14bf4363c4816ac7017c9965fc1bd": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "15522c84f8174a5e9751c5c25803fcf5": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "21cc92d9f9a64f92a6043a0c8a12a657": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "21eeeabc43bb4199a1048aa4584d353c": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "25e9905debef47fcb005c8d696680424": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "2b0e90300a1f4ab28be065bbc3864b80": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "2d1133ed035d49a68c98a6b18b178656": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "394af33ab0d044268d4eeb73139f7b37": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "3e588e54a6414b95ba816c41081e31d6": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "4bba185d13a14d698a61f3e3519c63fd": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "679a1f080a154ceeb9e2609a1ce71704": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "6e1256e2abe74c0ab4b0dc5ddf2b7bc6": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "8c7b849999b147aa9a6a101c1a53e338": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "8dd895d7431b4967aa0c4d03c46c8127": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "916040f5b64345abaac3f238416ab1c9": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "9987ad0d05564b4f95e00b2ea39765df": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "a2d1c3889f0e45fdbec97496fc4ad878": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "aff6ddc72148443da10a73512c0982a1": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "b1ff32f69dae4892a732a8adef6344fc": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "b4e83e55092a4250ba23a885609e8400": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "b98e54779e534a9689ac3aaffbfd5a6c": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "bad51d2b7d1c427e86c66dadb743fc4b": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "c2e66ab21ab84c709264eef184db1509": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "c42eaf61b2604250928b0d505c4c6413": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "cd66b0f3b4bf49a5bcd1db0a2c3f452d": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "d5c8503f198d4a0d856487c566a5ae43": {
     "views": [
      {
       "cell_index": 28
      }
     ]
    },
    "e124726908104334ba3324eb86f09bc4": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "eef21b8574c04fb4a0ee907064b8a635": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    },
    "fda965ad56df4353b6e963d29e3f1700": {
     "views": [
      {
       "cell_index": 23
      }
     ]
    }
   },
   "version": "1.2.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
